# This file contains the S-plus code that was used to implement the estimators and functionals in "Semiparametric Smoothing of Discrete Failure Time Data", Biometrical Journal, (54): 5-19, (2012)
# As a fully working example the code herein can reproduce Figure 1 of the paper (the relevant data are also given below). 
# The same functions (with different input data) can be used in reproducing other figures of the same paper 
# More general use of the estimates is also feasible by supplying the corresponding data set.  Use of the functions is self - explanatory and is further facilitated by the comments at he preamble of each function.

#N.B. Compatibility with R is not guaranteed as the functions were not written for use with R - however, with minimal modifications, their use in the R environment should be feasible.

options(echo=FALSE)
remove(objects())
############################################################
#
# obtain non-smooth (MLE) hazard rate estimate
#
############################################################
lambdahat<-function(xin,  cens, xout)
#non smooth estimate of the discrete hazard function
{
	n<-length(xout)
	mat<-sapply(1:n, function(i, xout, xin,  cens)
	{
		xoutuse<-xout[i]
		whichtk<-which(xoutuse == xin) ## index of those T_j = t_k
		xinuse<-xin[whichtk]	
		deltause<-cens[whichtk]	
		num<- length(which(deltause !=	0))	#equivalent to \sum_{i=1}^n I(T_i = t_k)\Delta_i
		den<-length(xinuse) 
		c(num, den)
	}, xout, xin,  cens)
	mat<-matrix(mat, ncol=2, byrow=T)
	
  numerator<-mat[,1]
  denuse<-mat[,2]
  m <- length(denuse)
  denominator<-sapply(1:m,  function(j, denuse, m) sum(denuse[j:m]), denuse=denuse, m=m) #denominator: \sum_{j \geq k} \sum_{k=1}^n I(T_j=t_k)
  numerator/(denominator+1)
}


MLEHazardCovar<-function(xin,  xinCov,  cens, xout)
#returns MLE (non smooth) HRE over covariate levels
{
	xinCovDistinct <- unique(xinCov)
	n<-length(xinCovDistinct)
	
	result<-matrix(nrow=length(xout), ncol=n)
	for(i in 1:n)
	{
		whichvl<-which(xinCov== xinCovDistinct[i]) ## index of those V_j = u_l
		xinuse<-xin[whichvl]	 # select the T_j's for which I(V_j=v_l)=1
		
		deltause<-cens[whichvl]	# and the corresponding \Delta_j
		result[,i]<-lambdahat(xinuse, deltause, xout) # on the selected data apply the original lambdahat function
	}
	#cat(result)
	result #each column is a level of the covariate
}
############################################################
#
# end non-smooth (MLE) hazard rate estimate
# Begin covariate matrix functions: Gamma (smoothing matrix) and power
#
############################################################

power.matrix <- function(M, n){
  #Raise matrix M to the n'th power
        powers <- base(n, 2)
        result <- diag( nrow(M) )
        for(i in 1:length(powers)) {
                if(powers[i])
                        result <- result %*% M
                M <- M %*% M
        }
        result
}

base <- function(m, b){
  #Express m as powers of b; m may be vector
  maxi <- floor(log(max(m))/log(b))
  result <- matrix( 0, length(m), maxi+1 )
  for(i in 1:(maxi+1)) {
    result[,i] <- m %% b
    m <- m %/% b
  }
  result
}



GammaMatrix1<-function(r)
#  Gamma (smoothing over covariate) matrix with constant trend. r = number of covariates.

{
	Delta<- r/3# 1/(3*r)
	b1<- 1-Delta/r 
	bk<- 1-2*Delta/r 
	ak <- Delta/r 	
	Wmatrix<-diag(bk, nrow = r, ncol=r)	
	Wmatrix[col(Wmatrix) - row(Wmatrix)==1]<- ak 
	Wmatrix[ row(Wmatrix) - col(Wmatrix) ==1]<- ak
	Wmatrix[1,1]<-b1
	Wmatrix[r,r]<-b1

	Wmatrix
}

GammaMatrix2<-function(r)
# Gamma (smoothing over covariate) matrix with linear trend. r = number of covariates.
{
	den<-	r*(r+1)
	Delta<-  den/6
	b1<- 1-2*Delta/den 
	bk<- 1-4*Delta/den 
	ak <- 2*Delta/den 	
	Wmatrix<-diag(bk, nrow = r, ncol=r)
	Wmatrix[col(Wmatrix) - row(Wmatrix)==1]<-ak
	Wmatrix[ row(Wmatrix) - col(Wmatrix) ==1]<- ak
	Wmatrix[1,1]<-b1
	Wmatrix[r,r]<-b1

	Wmatrix
}

############################################################
#
# end covariate (smoothing and power) functions
# Begin smoothing of MLE estimate over covariate levels
#
############################################################

SmoothCovEstimate<-function(xin,  xinCov, cens, xout)
{
	xinCovDistinct<-unique(xinCov)
	NoOfVovariates = length(xinCovDistinct)
	estimate<-MLEHazardCovar(xin,  xinCov, cens, xout) #get MLE estimate. Matrix. Each column is a HRE for a covariate
	#	cat("\n", is.matrix(estimate))
#estimate<-matrix( VehicleHazardRate(xout, "poisson", 1.6,0), nrow=length(xout), ncol=4)
	matrixuse<-GammaMatrix1(NoOfVovariates ) #calculate Gamma matrix
	#cat("\n", matrixuse)
	SmoothGammaMatrix<- power.matrix(matrixuse, 4) # get smoothing matrix - optimal power missing!!!
	#cat("\n", SmoothGammaMatrix)

	#cat(nrow(estimate), " ", ncol(estimate), " ", nrow(matrixuse), " ", ncol(matrixuse))
	#exportData(data=estimate, file="file1.xls")
	

	smoothestimate<- estimate   %*%  SmoothGammaMatrix
	#exportData(data=smoothestimate, file="file2.xls")
	smoothestimate
}

############################################################
#
# end smoothing of MLE estimate over covariate
# Begin smoothing over time
#
############################################################

Tm<-function(tk, distribution, par1, par2)
# This is used to calculate max(t_k; 1-\sum_{l=0}^k \eta_l > \epsilon), \epseilon>0
{
	SumDistrTk<-PdfSwitch(xout, distribution, par1, par2)
	n<-length(SumDistrTk)
	SumTK<-sapply(1:n,  function(i, SumDistrTk, n) sum(SumDistrTk[i:n]), SumDistrTk=SumDistrTk, n=n) #\sum_{l=0}^n \eta_l
	Tmh<- 1-SumTK  #1-\sum_{l=0}^n \eta_l 

	maxTmh<-max(which(Tmh == max(Tmh)) )#Tmh[Tmh>= 0 & Tmh<= min(Tmh[Tmh>=0])] # max{1-\sum_{l=0}^n \eta_l >0}
	tk[maxTmh] # find the gridpoint tk which corresponds to Tmh, return the result.
}



CparamCalculation<-function(gamparam, VehHazard)
# returns the C smoothing parameter calculated as 
# C= gamma/max_{k \geq 0} ( \lambda(t_{k-1}) + \lambda(t_k) + \lambda(t_{k+1}) )
{
	VehHazard<-c(0, VehHazard) # \lambda(t_{-1}) is set to 0 here
	n<-length(VehHazard)-2
	Lsums<-sapply(1:n, function(i, VehHazard) sum(VehHazard[seq(i,i+2, by=1)]), VehHazard) #calculate \lambda(t_{k-1}) + \lambda(t_k) + \lambda(t_{k+1})
      
gamparam/max(Lsums) #return C
}

##################################################################################################################
##############alternative DetermineSCprod<-function(NonParEst, VehHazard, Hvector, n, gammapar) function #########

DetermineSCprod<-function(NonParEst, VehHazard, Hvector, n, gammapar)
#this finds SC = \gamma((n+1) \hat B_1)^{-1} \hat V_1
# n = number of obs, gammapar = sum of vehicle haz at xout (computed elsewhere)
{
	VehHazard<-c(0, VehHazard)
	NonParEst<-c(0, NonParEst)
	n<-length(VehHazard)-2
	B1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst) 
				  ((NonParEst[i] + NonParEst[i+2])* VehHazard[i+1] - (VehHazard[i] + VehHazard[i+2])* NonParEst[i+1] )^2
				, VehHazard, NonParEst) #calculate (\hat \lambda(t_{k-1}) + \hat \lambda(t_{k+1})) \lambda(t_k) - ...)^2

   B1hat<-sum(B1hatElements)

   V1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst) 
				  NonParEst[i+1]*(1-NonParEst[i+1]) *(VehHazard[i] + VehHazard[i+2])/Hvector[i]
									, VehHazard, NonParEst)
	
	V1hat<- sum(V1hatElements	)					
		gammapar *  V1hat / ((n+1)*B1hat)	#estimated optimal product for S and C
	
}

#######################################################################################################################

DetermineSCprod<-function(NonParEst, VehHazard, Hvector, nn, gammapar)
#this finds SC = \gamma((n+1) \hat B_1)^{-1} \hat V_1
# n = number of obs, gammapar = sum of vehicle haz at xout (computed elsewhere)
{
	VehHazard<-c(0, VehHazard)
	NonParEst<-c(0, NonParEst) 
	n<-length(VehHazard)-2
	B1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst) 
						  ( sum(NonParEst[seq(i,i+2, by=2)])* VehHazard[(i+i+2)/2] - sum(VehHazard[seq(i,i+2, by=2)])* NonParEst[(i+i+2)/2])^2 
				, VehHazard, NonParEst) #calculate (\hat \lambda(t_{k-1}) + \hat \lambda(t_{k+1})) \lambda(t_k) - ...)^2

   B1hat<-sum(B1hatElements)
   V1hatElements<-sapply(1:n, function(i, VehHazard, NonParEst, Hvector){ 
						  NonParEst[(i+i+2)/2]*(1-NonParEst[(i+i+2)/2]) *sum(VehHazard[seq(i,i+2, by=2)])/Hvector[i]}
									, VehHazard, NonParEst, Hvector)
	V1hat<- sum(V1hatElements)
			(gammapar *  V1hat) / ((nn+1)*B1hat)	#estimated optimal product for S and C
}



SmoothedEstimate<-function(NonParEst, VehHazard, gammapar, SCproduct, Cpar)
{
	n<-length(VehHazard)-2 # maybe length(VehHazard)-2
	Sparam<- floor(SCproduct/Cpar) # S = S*C/C - this may need to change
	b0<- gammapar - Cpar*VehHazard[2] # b0 = \gamma - C \lambda_1 (counting starts at zero in the manuscript)
	bkplus1<- gammapar - Cpar * head(tail(VehHazard,2),1) # thisis the last value of the Bk vector (not specified in manuscript)
	ak <- Cpar * VehHazard
	bk <- sapply(1:n, function(i, VehHazard, Cpar, gammapar) 
				      { gammapar - Cpar * (VehHazard[i] + VehHazard[i+2])}, VehHazard, Cpar, gammapar)
	bk<- c(b0,bk, bkplus1)
	Wmatrix<-diag(bk)
		
	Wmatrix[col(Wmatrix) - row(Wmatrix)==1]<-head(ak, length(ak)-1)
	Wmatrix[ row(Wmatrix) - col(Wmatrix) ==1]<- tail(ak, length(ak)-1)
	LambdaMat<- (1/gammapar) * ( diag(1, nrow = length(VehHazard), ncol = length(VehHazard)) %*% Wmatrix)
	
	#################### test #################################
	#cat("\n Vehicle Hazard = ", VehHazard, "\n LAMBDA MAT = ", NonParEst %*% power.matrix(LambdaMat,200), "\ndiag= ", diag(power.matrix(LambdaMat,200)),  "\n")
	#aaaa<- NonParEst %*% power.matrix(LambdaMat,5)
	#plot(1:length(aaaa), VehHazard, type="l")
	#lines(1:length(aaaa), aaaa, lty=4)
	###########################################################
	
	if(Sparam == 0) LambdaMatrixSpower <- diag(1, nrow=length(VehHazard), ncol= length(VehHazard))
		else LambdaMatrixSpower <- power.matrix(LambdaMat, Sparam)
	
	SmoothEst<- matrix(NonParEst, nrow=1, ncol=length(VehHazard)) %*% LambdaMatrixSpower 
	SmoothEst

}

VehicleHazardRate<-function(xout, vehicledistr, param1, param2)
{
	switch(vehicledistr, binomial=dbinom(xout, param1, param2)/(1-pbinom(xout-1, param1, param2))
						  , geometric = dgeom(xout, param1)/(1-pgeom(xout-1, param1))
						  , poisson = dpois(xout, param1)/(1-ppois(xout-1, param1))
						  , negativebinomial = dnbinom(xout, param1, param2)/(1-pnbinom(xout-1, param1, param2))
						 
			)
}




SemiparamEst<-function(xin, cens, xout, xinCov, vehicledistr, vdparam1, vdparam2=0.5)
{
	#TkCutOff<-min(Tm(xout, Xdistr, Xpar1, Xpar2) , Tm(xout, Udistr, Upar1, Upar2), Tm(xout, vehicledistr, vdparam1, vdparam2))
  # cat(xout, " ", TkCutOff)
	#xout<-head(xout, TkCutOff) 
 	ColOut=2*length(unique(xinCov))
	result<-matrix(nrow=length(xout), ncol=ColOut/2)
	
	#Qvector <- 1-cumsum(PdfSwitch(xout-1, Xdistr, Xpar1, Xpar2))
	#Pvector <- 1-cumsum(PdfSwitch(xout-1, Udistr, Upar1, Upar2))

	#Hvector <-  Pvector * Qvector 
	NonparamEst<-SmoothCovEstimate(xin,  xinCov, cens, xout)
	VehicleHazard<-rep(0.5, length=length(xout))#VehicleHazardRate(xout, vehicledistr, vdparam1, vdparam2)
	cat(VehicleHazard)
	GammaParam <- 2 #sum(VehicleHazard)
	Cpar <-  0.5# CparamCalculation(GammaParam , VehicleHazard) #upper bound for C 0.5#
	SCproduct<- 15* Cpar # DetermineSCprod(NonparamEst, VehicleHazard, Hvector , length(xin), GammaParam)
	#cat("\nColOut= ", ColOut)
	for(i in 1:ColOut/2)
	{
 	 SmoothEst<- SmoothedEstimate(NonparamEst[,i], VehicleHazard, GammaParam , SCproduct, Cpar )	
	 #result[,2*i-1]<-xout
	 result[,i]<-SmoothEst
	}
	result
}

RdistSwitch<-function(dist, SampleSize, par1, par2=.5)
{
	switch(dist, binomial = rbinom(SampleSize, par1, par2)
				 , geometric = rgeom(SampleSize, par1)
				 , poisson = rpois(SampleSize, par1)
				 , negativebinomial = rnbinom(SampleSize, par1, par2)
				 , "uniform" = runif(SampleSize, par1, par2)    )
}


sterils<-importData("Prakash sterilis.xls")

xin <-sterils$time.interval

cens<-rep(1, times = length(xin))

xout<-seq(min(xin), max(xin), by=1)


xinCov<-sterils$operator.negligence

arg0<-MLEHazardCovar(xin,  xinCov, cens, xout)

arg2<-SemiparamEst(xin, cens, xout, xinCov, "geometric", .5, .5)
par(mfrow=c(2,3))

plot(sort(xout), arg2[,1], type="l", xlab="x", ylab="hazard rate")
plot(sort(xout), arg2[,2], type="l", xlab="x", ylab="hazard rate")
plot(sort(xout), arg2[,3], type="l", xlab="x", ylab="hazard rate")

col1<-c(arg2[,1], arg2[,2], arg2[,3])
col2<-rep(1:3, each=nrow(arg2))
col3<-rep(xout,3)

#coll1<-c(VehicleHazardRate(xout, "poisson", 5,0), VehicleHazardRate(xout, "poisson", 5,0), VehicleHazardRate(xout, "poisson", 5,0), VehicleHazardRate(xout, "poisson", 5,0))

newmat<-data.frame("covariate level"=col2,time=col3,"hazard rate"=col1)
guiPlot("3D Scatter", DataSetValues = newmat, NumConditioningVars=0)